/*************************************************
author: RuanShengQiang
date: 2017/3/24
**************************************************/
#define vec2 float2
#define vec3 float3
#define vec4 float4
#define rgb xyz
#define rgba xyzw
#define PI 3.141592653589f

const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;

//to amp "amp" times the picture;
static vec2 scale(vec2 uv, float amp)
{

	return fmod((uv + (vec2)(0.5f))*(amp), 1.0f);

}
static vec2 _rotate(vec2 uv, vec2 center, float theta)
{
	vec2 temp;
	temp.x = dot((vec2)(cos(theta), -sin(theta)), uv - center);
	temp.y = dot((vec2)(sin(theta), cos(theta)), uv - center);
	return temp ;
}

static vec2 warp(vec2 uv, vec2 center, float strength, float r, float amp)
{

	return fmod(center + (uv - center) / (strength*r*r + strength*r + 1.0f), 1.0f / amp);
}
static vec2 myMod(vec2 x, float y)
{
	return x-y * floor (x/y);
}

static vec2 mirror(vec2 uv)
{
	vec2 parity = myMod(floor(uv),2.0f);
	vec2 sign = -2.0f*parity+1.0f;
	return myMod( 2.0f*parity+sign*myMod(uv,1.0f),1.0f);
}

static vec4 INPUT(image2d_t src_data, __global FilterParam* param, vec2 tc)
{
	tc = (vec2)(tc.x, tc.y)*(vec2)(param->origROI[2], param->origROI[3]) + (vec2)(param->origROI[0], param->origROI[1]);
	return read_imagef(src_data, sampler, (vec2)(tc.x, 1.0f - tc.y));
}

static float4 blur(image2d_t input, __global FilterParam* param, vec2 uv, vec2 dir, int Samples, float processMB, vec2 rollDir)
{

	vec2 temp1;
	vec2 temp2;
	vec2 temp3;
	float2 diver = dir*processMB/(float)(Samples);
	vec4 color = (vec4)(0.0f,0.0f,0.0f,1.0f);
	for (int i = 0; i < Samples; i += 2) //operating at 2 samples for better performance
	{
		temp1 = mirror( uv + (float)(i) * diver +rollDir );
		color += INPUT(input, param, temp1);
		temp1 = mirror( uv + (float)(i+1) * diver +rollDir );
		color += INPUT(input, param, temp1);
	}
	return color;
}


__kernel void MAIN(__read_only image2d_t input1, __read_only image2d_t input2, __write_only image2d_t dstImg,__global FilterParam* param)
{
	float progress = param->cur_time / param->total_time;
	int W = get_global_size(0);
	int H = get_global_size(1);
	int textH = param->height[2];
	int w = get_global_id(0);
	int h = get_global_id(1);
	float2 resolution = (float2)(W,H);
	int2 gl_FragCoord = (int2)(get_global_id(0), get_global_id(1));
	vec2 fragCoord = (vec2)(get_global_id0( param), get_global_id1( param));
	vec2 uv = ((vec2)(fragCoord.x, fragCoord.y) + (vec2)(0.5f)) /resolution.xy;
	float prop = resolution.y / resolution.x;
	float iGlobalTime = progress;
	
	const float sampleSize = 32.0f;
	const int Samples = 16;//multiple of 2

	vec2 center = (vec2)(1.0f,0.0f);
	vec2 dir = (vec2)(0.0f,1.0f);
	float r = length(dir);
	float processColorSp = pow(cos(PI*(iGlobalTime-0.5f)),2.0f)*0.01f;
	float processMB = iGlobalTime*0.5f;// the blur trend is same as scale
	
	float temp = cos(PI*3.0f/2.0f*(iGlobalTime-0.33333f));
	vec2 rollDir = 0.5f*(vec2)(0.0f,temp*temp);

	float processWarpR = pow(cos(PI*(iGlobalTime-0.5f)),2.0f);// warp amp
	float processScalR = 4.0f*(iGlobalTime-1.0f)*(iGlobalTime-1.0f);
	float processMBR= 1.0f*(iGlobalTime-1.0f)*(iGlobalTime-1.0f);
	
	float x = iGlobalTime;
	float x2 = (iGlobalTime)*(iGlobalTime);
	float x3 = x2*(iGlobalTime);
	float x4 = x3*(iGlobalTime);
	float x5 = x4*(iGlobalTime);
	float x6 = x5*(iGlobalTime);
	float rotateDeta = PI/180.0f*(45.0f);
	if (iGlobalTime<0.2f)
    { 
		rollDir =  -(vec2)( 0.0f, 1.0E+06f*x6 - 641535.0f*x5 + 138013.0f*x4 - 13705.0f*x3 + 622.28f*x2 - 5.0144f*x + 0.00481f )*1.7f;
		//vec4 color = (vec4)(0.0f,0.0f,0.0f,1.0f);
		float processMB = -clamp( (6.00E+06f*x5-3.21E+06f*x4+5.52E+05f*x3-4.11E+04f*x2+1.24E+03f*x-5.01E+00f)*0.003f,-0.2f,0.2f);
		processColorSp = clamp(rollDir.y*0.009f,-0.01f,0.01f);
		rollDir= _rotate(rollDir, (vec2)(0.0f),rotateDeta)/1.414f;
		dir= _rotate(dir, (vec2)(0.0f),rotateDeta);
		vec4 color = globalBlur(input1, param, uv, dir,Samples, processMB,rollDir, resolution);
		write_imagef(dstImg, (int2)(w, textH - h -1), color/(float)(Samples));
    }
	else {
		float x = iGlobalTime-0.3361f;
		x2 = x*x;
		x3 = x2*x;
		x4 = x3*x;
		x5 = x4*x;
		x6 = x5*x;
		rollDir = -(vec2)( 0.0f,-148.19f*x6 + 238.72f*x5 + 25.04f*x4 - 256.21f*x3 + 180.65f*x2 - 47.609f*x + 3.9918f)*0.5f;
		processMBR = (-888.6f*x5+1193.6f*x4+100.16f*x3-768.63f*x2+361.3f*x -47.609f)*0.02f;
		if(iGlobalTime > 0.5870f )
			processMBR = 0.0f;
		processColorSp = processMBR*0.0001f;
		rollDir= _rotate(rollDir, (vec2)(0.0f),rotateDeta)/1.414f;
		dir= _rotate(dir, (vec2)(0.0f),rotateDeta);
		vec4 color = globalBlur(input2, param, uv, dir,Samples, processMBR,rollDir, resolution);
		write_imagef(dstImg, (int2)(w, textH - h -1), color/(float)(Samples));
	} 
	
}
